package com.example.crawler.component;

import com.example.crawler.entity.SchoolInfoEntity;
import com.xiaoleilu.hutool.http.Header;
import com.xiaoleilu.hutool.http.HttpRequest;
import com.xiaoleilu.hutool.http.HttpUtil;
import com.xiaoleilu.hutool.io.FileUtil;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

import java.util.Random;

/**
 * com.example.crawler.component
 *
 * @author 白雨浓
 * date: 17-11-13 下午11:19
 * content:
 */
@Component
public class SchoolImgComponent {

    public void start() {
        //306
        for (int i = 0; i < 306; i++) {
            handleHtml(i + 1);
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    private void handleHtml(int index) {
        String ip = "181" + "." + new Random().nextInt(254) + "." + new Random().nextInt(254) + "." + new Random().nextInt(254);
        String url = "http://www.gaokaopai.com/daxue-0-0-0-0-0-0-0--p-" + index + ".html";

        String html = HttpRequest.get(url)
                .header(Header.USER_AGENT, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0")
                .header(Header.COOKIE, "aliyungf_tc=AQAAAIn6I2QLYwQAKUjFtwqf0GNbfEga; PHPSESSID=nrd1m2g6fo4mnf1rq74p1p2t50; SERVERID=70bbdefa3b448ccda1465d71e2b3764b|1510586969|1510586968; Hm_lvt_b160b2fb030578dd4378d9630504cf2c=1510585066; Hm_lpvt_b160b2fb030578dd4378d9630504cf2c=1510586970; _ga=GA1.2.1428926452.1510586970; _gid=GA1.2.880083379.1510586970; _gat=1")
                .header("X-Forwarded-For", ip)
                .header("Cache-Control", "max-age=0")
                .header("Upgrade-Insecure-Requests", "1")
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
                .execute().body();

        Document doc = Jsoup.parse(html);

        Elements elements = doc.select("div.pic img");

        for (Element element : elements) {
            String schoolName = element.attr("alt").split("（")[0];
            String src = element.attr("src");
            String k = src.substring(src.lastIndexOf("."), src.length());
            new Thread(() -> HttpUtil.downloadFile(src,
                    FileUtil.file("/home/evil/临时文件夹/school_logo/" + schoolName + k)))
                    .start();
            System.out.println(schoolName);
        }
    }

}
